{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "Cdje0635tAKQ"
   },
   "outputs": [],
   "source": [
    "########################################################################################\n",
    "########################################################################################\n",
    "#Replication file for:\n",
    "#The Importance of a Liberal Power's Attention to Democratic Elections Around the World\n",
    "#Johannes Bubeck Ashrakat Elshehawy Nikolay Marinov Federico Nanni\n",
    "########################################################################################\n",
    "########################################################################################"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "hNspXYjztwiE",
    "outputId": "2a5b6a73-d074-4bd9-e6d3-4a4871b1e9b5"
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_72664/3551500226.py:6: DtypeWarning: Columns (7,11,12,13) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  data = pd.read_csv(\"/Users/ashrakatelshehawy/Dropbox/Working Papers and Submissions/How the US Electoral Cycle Affects Elections Around the World/Replication JOP/Replication JOP Oct 2023/Preparation/To send/Data/eval.csv\")\n"
     ]
    }
   ],
   "source": [
    "#import pandas\n",
    "import pandas as pd\n",
    "\n",
    "#read data\n",
    "#please change your working directory\n",
    "data = pd.read_csv(\"/Data/eval.csv\")\n",
    "\n",
    "#check ones that have been evaluated\n",
    "annotated_data = data.loc[data['check'] == 1]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "BEsxS2Ott94r",
    "outputId": "ee11bda3-ad53-476e-a2da-75c62191d111"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "PRECISION\n",
      "speech 0.47800659657802524\n",
      "245\n",
      "bill 0.5577342047930283\n",
      "153\n",
      "law 0.4789272030651341\n",
      "87\n",
      "paper 0.5043859649122806\n",
      "76\n",
      "order 0.7040229885057472\n",
      "58\n",
      "###########\n",
      "MAP\n",
      "speech 0.714697854229104\n",
      "52\n",
      "bill 0.6527777777777778\n",
      "12\n",
      "law 0.8\n",
      "5\n",
      "paper 0.825\n",
      "10\n",
      "order 1.0\n",
      "4\n"
     ]
    }
   ],
   "source": [
    "#save election ids\n",
    "election_ids = set(annotated_data[\"var1\"])\n",
    "\n",
    "#these are the document types\n",
    "docs = [\"speech\",\"bill\",\"law\",\"paper\",\"order\"]\n",
    "\n",
    "# to store all results for the different types of docs\n",
    "all_p = {x:[] for x in docs}\n",
    "all_maps = {x:[] for x in docs}\n",
    "\n",
    "# for each election id\n",
    "for election in election_ids:\n",
    "    # we retrieve the results\n",
    "    res = annotated_data.loc[annotated_data['var1'] == election]\n",
    "\n",
    "    # for each type of doc\n",
    "    for doc in docs:\n",
    "        # we keep only relevant docs\n",
    "        doc_res = res.loc[res[doc] == 1]\n",
    "        if doc_res.empty:\n",
    "            continue\n",
    "\n",
    "        else:\n",
    "            # we sort the ranking\n",
    "            doc_res = doc_res.sort_values('u', ascending=False)\n",
    "            # we compute precision for each document\n",
    "            p = sum(doc_res[\"precise\"])/len(doc_res)\n",
    "            # and the add this to this list to sum it up at the end\n",
    "            all_p[doc].append(p)\n",
    "\n",
    "            # we compute MAP only in settings where we have more than 1 document retrieved and at least one relevant and one irrelevant\n",
    "            # this way we cann assess the quality of the ranking\n",
    "            if len(doc_res[\"precise\"])>1 and len(set(doc_res[\"precise\"]))>1:\n",
    "\n",
    "                positive = 0\n",
    "                avgP = []\n",
    "                # in this for loop we compute the average precision of each relevant doc\n",
    "                # see here: https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Average_precision\n",
    "                for r in range(len(doc_res[\"precise\"])):\n",
    "                    if doc_res[\"precise\"].iloc[r]== 1:\n",
    "                        positive += 1\n",
    "                        avgP.append(positive/(r+1.0))\n",
    "                avgP = sum(avgP)/len(avgP)\n",
    "                all_maps[doc].append(avgP)\n",
    "\n",
    "#this is the output that is in the table\n",
    "print (\"PRECISION\")\n",
    "for doc,score in all_p.items():\n",
    "    print (doc,sum(score)/len(score))\n",
    "    print (len(score))\n",
    "\n",
    "print (\"###########\")\n",
    "print (\"MAP\")\n",
    "for doc,score in all_maps.items():\n",
    "    print (doc,sum(score)/len(score))\n",
    "    print (len(score))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "colab": {
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
